* Summary stats
* 3/21/2025

clear all
set more off
set seed 1515

program main
	summary_table
	histogram_cap_start_years
	time_series_sc
	time_series_state
	other_snap_policies_timeseries
end

program summary_table
	sc_table 
	state_table
			
	matrix full_table = state_table, sc_table
	
	esttab matrix(full_table, fmt(%12.3fc)), ////
		label nomtitle nonote collabels(none) booktabs compress
	
	esttab matrix(full_table, fmt(%12.3fc)) ///
		using "$output/all_sum", replace  ///
		label nomtitle nonote collabels(none) booktabs compress
end

program sc_table
	use "$for_analysis/sc_for_regressions_actual_ssi", clear
	gen_covariates // create and label additional covariates
	
	* generate table	
	foreach cov in snap inctot poverty age black hispanic female ///
		ed_1 ed_2 ed_3 ed_4 disabl urban {
			* means of treated and control
			su `cov' if treat == 1 & post == 0 [aw = perwt], meanonly
			if inlist("`cov'", "inctot", "poverty") { // round dollars and percentages
				local rounded_mean = round(`r(mean)')
				mat `cov'_t = `rounded_mean'
			}
			else {
				mat `cov'_t = `r(mean)'
			}
			local N_t = `r(N)'
			su `cov' if treat == 0 & post == 0 [aw = perwt], meanonly
			if inlist("`cov'", "inctot", "poverty") { // round dollars and percentages
				local rounded_mean = round(`r(mean)')
				mat `cov'_c = `rounded_mean'
			}
			else {
				mat `cov'_c = `r(mean)'
			}
			local N_c = `r(N)'
			
			* test for difference between treated and control
			reg `cov' i.treat if post == 0 [aw = perwt], r
			if inlist("`cov'", "inctot", "poverty") { // round dollars and percentages
				local rounded_coef = round(_b[1.treat])
				local rounded_se = round(_se[1.treat])
				mat `cov'_diff = `rounded_coef', `rounded_se'
			}
			else {
				matrix `cov'_diff = _b[1.treat], _se[1.treat]
			}
			matrix `cov' = `cov'_t, `cov'_c, `cov'_diff
			local N_diff = `e(N)'
			
			local this = strtoname("`cov'") 
			local rows `rows' `this'
	}
	matrix sc_table = snap \ inctot \ poverty \ age \ black \ hispanic \ female \ ///
		ed_1 \ ed_2 \ ed_3 \ ed_4 \ disabl \ urban
	mat rownames sc_table = `rows'
	mat N_sc = `N_t', `N_c', `N_diff', .
	mat rownames N_sc = "N"
	mat sc_table = sc_table \ N_sc
end		

program gen_covariates
	gen black = (race == 2)
	gen hispanic = (hispan != 0)
	gen female = (sex == 2)
	tab edcat, gen(ed_)
	gen urban = (metro >= 2 & metro != .)
	gen disabl = (diffphys == 2)
	
	lab var snap "SNAP Enrollment"
	lab var inctot "Personal Income"
	lab var poverty "Poverty Status"
	lab var black "Black"
	lab var hispanic "Hispanic"
	lab var female "Female"
	lab var ed_1 "HS Dropout"
	lab var ed_2 "HS Graduate"
	lab var ed_3 "Some College"
	lab var ed_4 "College Graduate"
	lab var age "Age"
	lab var urban "Lives in Metro Area"
	lab var disabl "Disability"
end

program state_table
	set seed 1515 // re-set same seed to ensure reproducibility if only this program is run
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	gen_covariates // create and label additional covariates
	
	* generate indicator for ever treated state
	bys statefip: egen evertreat = max(inter)
	gen ever_treated = (evertreat > 0)

	* generate tables: treated group
	preserve
		keep if ever_treated == 1 & inter == 0 // in treated group but before treatment
		tab year
		
		* table for treated
		foreach cov in snap inctot poverty age black hispanic female ///
			ed_1 ed_2 ed_3 ed_4 disabl urban {
				su `cov' [aw = perwt], meanonly
				if inlist("`cov'", "inctot", "poverty") { // round dollars and percentages
					local rounded_mean = round(`r(mean)')
					mat `cov'_t = `rounded_mean'
				}
				else {
					mat `cov'_t = `r(mean)'
				}
				local N_t = `r(N)'
		}
		matrix state_t = snap_t \ inctot_t \ poverty_t \ age_t \ black_t \ hispanic_t \ female_t \ ///
			ed_1_t \ ed_2_t \ ed_3_t \ ed_4_t \ disabl_t \ urban_t
		save "$for_analysis/treated", replace
		
		* store the share in each year, to match this distribution among control:
		gen obs = 1
		egen total_treated = sum(obs)
		bys year: egen year_treated = sum(obs)
		gen treated_share = .
		levelsof year, local(years)
		foreach y of local years {
			gen share`y' = year_treated / total_treated if year == `y'
			egen share`y'_filled = max(share`y')
			local share`y' = share`y'_filled[1]
			replace treated_share = `share`y'' if year == `y'
		}
		keep year treated_share total_treated
		duplicates drop
		save "$for_analysis/treated_shares", replace
	restore
	
	* control group
	keep if ever_treated == 0
	merge m:1 year using "$for_analysis/treated_shares", ///
		assert(1 3) keep(3) nogen // _merge = 1 is post-2010 years: not pre-treatment years for any treated states
	tab year
	gen obs = 1
	egen controls_total = sum(obs)
	bys year: egen controls_year = sum(obs) 
	
	preserve
		keep year treated_share controls_total controls_year total_treated
		duplicates drop
		gen ratio = controls_year / treated_share // determines maximum number of controls that can be kept
		qui su ratio, d
		gen constrained_opt_controls = round(`r(min)') + 750 // allow for tolerance of small number of people
		li
		drop ratio
		save "$for_analysis/control_shares", replace
	restore
	
	cap drop treated_share controls_total controls_year
	merge m:1 year using "$for_analysis/control_shares", assert(3) keep(3) nogen
	sort sample serial pernum
	gen random = runiform(0,1)
	bys year (random): gen num_control = _n
	keep if num_control <= (treated_share * constrained_opt_controls)
	drop random
	tab year
	gen control_to_keep = 1
	save "$for_analysis/control", replace
	
	* table for control
	foreach cov in snap inctot poverty age black hispanic female ///
		ed_1 ed_2 ed_3 ed_4 disabl urban {
			su `cov' [aw = perwt], meanonly
			if inlist("`cov'", "inctot", "poverty") { // round dollars and percentages
				local rounded_mean = round(`r(mean)')
				mat `cov'_c = `rounded_mean'
			}
			else {
				mat `cov'_c = `r(mean)'
			}
			local N_c = `r(N)'
	}
	matrix state_c = snap_c \ inctot_c \ poverty_c \ age_c \ black_c \ hispanic_c \ female_c \ ///
		ed_1_c \ ed_2_c \ ed_3_c \ ed_4_c \ disabl_c \ urban_c
	
	use "$for_analysis/treated", clear
	append using "$for_analysis/control"
	
	* difference between treated and control
	foreach cov in snap inctot poverty age black hispanic female ///
		ed_1 ed_2 ed_3 ed_4 disabl urban {
			reg `cov' i.ever_treated ///
				if ((ever_treated == 1 & inter == 0) | (ever_treated == 0 & control_to_keep == 1)) ///
				[aw = perwt], r
			if inlist("`cov'", "inctot", "poverty") { // round dollars and percentages
				local rounded_coef = round(_b[1.ever_treated])
				local rounded_se = round(_se[1.ever_treated])
				mat `cov'_diff = `rounded_coef', `rounded_se'
			}
			else {
				matrix `cov'_diff = _b[1.ever_treated], _se[1.ever_treated]
			}
			local N_diff = `e(N)'
			
			local this = strtoname("`cov'") 
			local rows `rows' `this'
	}
	matrix state_diff = snap_diff \ inctot_diff \ poverty_diff ///
		\ age_diff \ black_diff \ hispanic_diff \ female_diff \ ///
		ed_1_diff \ ed_2_diff \ ed_3_diff \ ed_4_diff \ disabl_diff \ urban_diff
	matrix state_table = state_t, state_c, state_diff
		matrix rownames state_table = `rows'
	mat N_state = `N_t', `N_c', `N_diff', .
	mat rownames N_state = "N"
	mat state_table = state_table \ N_state
		
	cap erase "$for_analysis/treated.dta"
	cap erase "$for_analysis/treated_shares.dta"
	cap erase "$for_analysis/control.dta"
	cap erase "$for_analysis/control_shares.dta"
end

program histogram_cap_start_years // histogram of CAP adoption years
	* generate file of treated states with their start years and CAP formats
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	bys statefip: gen obs = _n 
	keep if obs == 1 & !mi(startyear)
	gen fips = statefip
	keep fips statefip startyear standard
	ren (startyear standard) (year smflag)
	replace smflag = 2 if smflag == 0
	sort year
	gen format = "Standard" if smflag == 1
	replace format = "Modified" if smflag == 2
	decode statefip, gen(stateString) 
	gen state = strproper(stateString)
	keep state fips year smflag format
	order state fips year smflag format

	save "$intermediate/state_cap_sm_year", replace
	export delimited "$intermediate/state_cap_sm_year.csv", replace
	 
	ren year startyear
	gcollapse (mean) startyear, by(fips)
	keep if startyear != .
	
	hist startyear, d ylabel(, angle(0) labsize(medsmall)) ylabel(0(1)4) ///
		graphregion(color(white)) xtitle("Year") xlab(#10) ///
		color(midblue%60) freq xscale(titlegap(2)) ytitle("Number of States that Adopted the CAP")
	graph export "$output/hist_startyears.png", replace
end

program time_series_sc  // SNAP enrollment over time: singles-couples design
	use "$for_analysis/sc_for_regressions_actual_ssi", clear
	
	gen relative_year = year - startyear  // different from relyr because needs to be defined for controls too
	keep if inrange(relative_year, -5, 5)
	gen snap_single = snap if single == 1
	gen snap_couple = snap if single == 0
	cou
	di "sc sample size: `r(N)'"
	gcollapse (mean) snap_single snap_couple [w = perwt], by(relative_year)
	lab var relative_year "Years Since CAP Adopted"
	
	tsset relative_year
	twoway (tsline snap_single, recast(connected) msymbol(O) mcolor(blue%50) lcolor(blue%50)) ///
		(tsline snap_couple, recast(connected) msymbol(D) mcolor(red%50) lcolor(red%50)), ysc(range(0.05 0.35)) ///
		ylab(#6, angle(0)) xsc(range(-5 5)) xscale(titlegap(2)) ///
		xlab(-5(1)5) graphregion(fcolor(white) lcolor(white) lwidth(vvvthin)) xline(-1, lp(dash) lcolor(gs12)) ///
		legend(order(1 "Singles" 2 "Couples") nobox region(lstyle(none))) ytitle("Proportion Enrolled in SNAP")
	graph export "$output/timeseries_sc.png", replace
end

program time_series_state
	set seed 1515 // re-set same seed to ensure reproducibility if only this program is run
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	
	* assign fake start years for control observations to match the treatment distribution
	levelsof year, local(years)
	foreach y of local years {
		qui assign_control_years, year(`y')
	}
	
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	levelsof year, local(years)
	foreach y of local years {
		qui merge 1:1 serial pernum year ///
			using "$for_analysis/controls_assigned_startyears`y'", ///
			assert(1 3 4) keep(1 3 4) nogen update
		cap erase "$for_analysis/controls_assigned_startyears`y'.dta"
	}

	* generate variables for graphing
	gen treated = !mi(startyear)
	replace startyear = startyear_c if mi(startyear)
	cap drop relyr*
	gen relyr = year - startyear
	replace relyr = -6 if relyr < -6  // bin large relative years
	replace relyr = 6 if relyr > 6
	keep if inrange(rely, -5, 5)
	lab var relyr "Years Since CAP Adopted"

	gen snap_treated = snap if treated == 1
	gen snap_control = snap if treated == 0
	cou
	di "state sample size: `r(N)'"
	gcollapse (mean) snap_treated snap_control [w = perwt], by(relyr)
		
	tsset relyr
	twoway (tsline snap_treated, recast(connected) msymbol(O) mcolor(blue%50) lcolor(blue%50)) ///
		(tsline snap_control, recast(connected) msymbol(D) mcolor(red%50) lcolor(red%50)), ysc(range(0.1 0.35)) ///
		ylab(0(0.1)0.6, angle(0)) xsc(range(-5 5)) xscale(titlegap(2)) ///
		xlab(-5(1)5) graphregion(fcolor(white) lcolor(white) lwidth(vvvthin)) xline(-1, lp(dash) lcolor(gs12)) ///
		legend(order(1 "Treated States" 2 "Control States") nobox region(lstyle(none))) ytitle("Proportion Enrolled in SNAP")
	graph export "$output/timeseries_state.png", replace
end

program assign_control_years
	syntax, year(int)
	
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	qui keep if year == `year'  // extract populations for that year
	gen persons = 1
	gcollapse (sum) pop_`year' = persons, by(startyear)

	* share treated in each year
	egen treated_pop = sum(pop_`year') if !mi(startyear)
	bys startyear: egen treated_year_pop = sum(pop_`year') if !mi(startyear)
	gen treated_share = treated_year_pop / treated_pop
	qui keep if !mi(startyear)
	keep startyear treated_share
	sort startyear
	gen cumulative_share = sum(treated_share)
	di "treatment distribution, year `year'"
	li startyear treated_share // check the distribution

	qui levelsof startyear, local(years)
	local i = 0
	foreach y of local years {
		local i = `i' + 1
		preserve
			qui keep if startyear == `y'
			local cumulative`i' = cumulative_share[1]
			local startyear`i' = `y'
		restore
	}
	local total_years = `i'

	* assign start years for controls to match population-weighted treatment distribution
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	gen persons = 1
	qui keep if mi(startyear) & year == `year'  // controls in that year
	keep serial pernum statefip year persons
	
	sort serial pernum
	gen random = runiform(0,1)
	sort random
	egen total_pop = sum(persons)
	gen cumulative_pop = sum(persons)
	gen cumulative_share = cumulative_pop / total_pop

	gen startyear = .
	forval i = 1/`total_years' {
		qui replace startyear = `startyear`i'' if mi(startyear) & cumulative_share <= `cumulative`i''
	}
	preserve // check distribution
		bys startyear: egen control_pop_startyear = sum(persons)
		qui replace control_pop_startyear = control_pop_startyear / total_pop
		gcollapse (mean) control_pop_startyear, by(startyear)
		di "control distribution, year `year'"
		li
	restore
	assert !mi(startyear)
	keep serial pernum year startyear
	ren startyear startyear_control
	save "$for_analysis/controls_assigned_startyears`year'", replace
end

program other_snap_policies_timeseries
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	gen treat = (startyear != .)

	* other SNAP policies
	de has_*, f
	egen num_policies = rowtotal(has_*)
	keep statefip year treat num_policies 
	duplicates drop
	gisid statefip year

	gcollapse (mean) num_policies, by(treat year)
	gen num_treated = num_policies if treat == 1
	gen num_control = num_policies if treat == 0
	gcollapse (firstnm) num_treated num_control, by(year)

	tsset year
	twoway (tsline num_treated, recast(connected) msymbol(O) mcolor(blue%50) lcolor(blue%50)) ///
		(tsline num_control, recast(connected) msymbol(D) mcolor(red%50) lcolor(red%50)), ysc(range(0 5)) ///
		ylab(0(1)5, angle(0)) xsc(range(2000 2016)) xscale(titlegap(2)) xtitle("Year") ///
		xlab(2000(2)2016) graphregion(fcolor(white) lcolor(white) lwidth(vvvthin)) xline(0, lp(dash) lcolor(gs12)) ///
		legend(order(1 "Treated States" 2 "Control States") nobox region(lstyle(none))) ytitle("Mean Number of Policies")
	graph export "$output/other_policies.png", replace
end


* Execute
main
